library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
library(urca)
## Warning: package 'urca' was built under R version 4.3.3
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.3.3
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.3.3
data_revenue = read_excel("D:/Quarterly_Revenue.xlsx", col_names = FALSE)
## New names:
## • `` -> `...1`
ts_revenue = ts(data_revenue, start = c(2010, 1), frequency = 4)
time = seq_along(ts_revenue)
ts_plot(ts_revenue,
title = "",
Xtitle = "Năm",
Ytitle = "Doanh thu thuần (Tỷ VND)")
#Chia tap train va val
train_revenue = ts_revenue[1:(length(ts_revenue) - 4)]
test_revenue = ts_revenue[(length(ts_revenue) - 3):length(ts_revenue)]
time_train = head(time, -4)
time_test = tail(time, 4)
ts_train = ts(train_revenue, start = c(2010, 1), frequency = 4)
#Lin-Lin
model_lin_lin = lm(ts_train ~ time_train)
summary(model_lin_lin)
##
## Call:
## lm(formula = ts_train ~ time_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -92.733 -38.407 -1.619 37.906 117.224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 405.9065 13.0489 31.107 < 2e-16 ***
## time_train -2.3369 0.3983 -5.868 2.78e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.17 on 54 degrees of freedom
## Multiple R-squared: 0.3894, Adjusted R-squared: 0.378
## F-statistic: 34.43 on 1 and 54 DF, p-value: 2.776e-07
rmse(ts_train, fitted(model_lin_lin))
## [1] 47.30389
mape(ts_train, fitted(model_lin_lin))
## [1] 0.1198491
future_data = data.frame(time_train = 57:60)
forecast_lin_lin = predict(model_lin_lin, newdata = future_data)
forecast_lin_lin
## 1 2 3 4
## 272.7006 270.3637 268.0268 265.6898
rmse(test_revenue, forecast_lin_lin)
## [1] 21.16687
mape(test_revenue, forecast_lin_lin)
## [1] 0.0638153
#Lin-Log
model_lin_log = lm(ts_train ~ log(time_train))
summary(model_lin_log)
##
## Call:
## lm(formula = ts_train ~ log(time_train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -113.39 -44.42 10.45 42.40 115.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 435.928 26.360 16.538 < 2e-16 ***
## log(time_train) -31.395 8.229 -3.815 0.000352 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 54.71 on 54 degrees of freedom
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.1977
## F-statistic: 14.56 on 1 and 54 DF, p-value: 0.0003519
rmse(ts_train, fitted(model_lin_log))
## [1] 53.7249
mape(ts_train, fitted(model_lin_log))
## [1] 0.1438712
forecast_lin_log = predict(model_lin_log, newdata = future_data)
forecast_lin_log
## 1 2 3 4
## 308.9974 308.4514 307.9147 307.3871
rmse(test_revenue, forecast_lin_log)
## [1] 33.25875
mape(test_revenue, forecast_lin_log)
## [1] 0.106548
#Log-Lin
model_log_lin = lm(log(ts_train) ~ time_train)
summary(model_log_lin)
##
## Call:
## lm(formula = log(ts_train) ~ time_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36424 -0.11519 0.01051 0.11770 0.32931
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.018104 0.040126 149.982 < 2e-16 ***
## time_train -0.007303 0.001225 -5.963 1.95e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1481 on 54 degrees of freedom
## Multiple R-squared: 0.3971, Adjusted R-squared: 0.3859
## F-statistic: 35.56 on 1 and 54 DF, p-value: 1.951e-07
rmse(ts_train, exp(fitted(model_log_lin)))
## [1] 47.93705
mape(ts_train, exp(fitted(model_log_lin)))
## [1] 0.1184165
forecast_log_lin = exp(predict(model_log_lin, newdata = future_data))
forecast_log_lin
## 1 2 3 4
## 270.9177 268.9463 266.9892 265.0464
rmse(test_revenue, forecast_log_lin)
## [1] 21.48485
mape(test_revenue, forecast_log_lin)
## [1] 0.06220432
#Log-Log
model_log_log = lm(log(ts_train) ~ log(time_train))
summary(model_log_log)
##
## Call:
## lm(formula = log(ts_train) ~ log(time_train))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42848 -0.12795 0.04048 0.13078 0.32437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.11318 0.08126 75.228 < 2e-16 ***
## log(time_train) -0.09852 0.02537 -3.884 0.000283 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1687 on 54 degrees of freedom
## Multiple R-squared: 0.2183, Adjusted R-squared: 0.2039
## F-statistic: 15.08 on 1 and 54 DF, p-value: 0.0002827
rmse(ts_train, exp(fitted(model_log_log)))
## [1] 55.03356
mape(ts_train, exp(fitted(model_log_log)))
## [1] 0.1442767
forecast_log_log = exp(predict(model_log_log, newdata = future_data))
forecast_log_log
## 1 2 3 4
## 303.3401 302.8208 302.3113 301.8111
rmse(test_revenue, forecast_log_log)
## [1] 28.54126
mape(test_revenue, forecast_log_log)
## [1] 0.0863775
#Dat bien gia mua vu
season_q1 = c(rep(c(1, 0, 0, 0), 15))[1:56]
season_q2 = c(rep(c(0, 1, 0, 0), 15))[1:56]
season_q3 = c(rep(c(0, 0, 1, 0), 15))[1:56]
season_q4 = c(rep(c(0, 0, 0, 1), 15))[1:56]
#Mua vu tuyen tinh dang cong
model_season_add = lm(ts_train ~ time_train + season_q2 + season_q3 + season_q4)
summary(model_season_add)
##
## Call:
## lm(formula = ts_train ~ time_train + season_q2 + season_q3 +
## season_q4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -97.850 -38.062 -0.611 36.739 111.955
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 402.1532 17.2311 23.339 < 2e-16 ***
## time_train -2.3496 0.4097 -5.735 5.32e-07 ***
## season_q2 4.4924 18.6890 0.240 0.811
## season_q3 2.4849 18.7025 0.133 0.895
## season_q4 9.4773 18.7249 0.506 0.615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.43 on 51 degrees of freedom
## Multiple R-squared: 0.3927, Adjusted R-squared: 0.345
## F-statistic: 8.243 on 4 and 51 DF, p-value: 3.308e-05
rmse(ts_train, fitted(model_season_add))
## [1] 47.17603
mape(ts_train, fitted(model_season_add))
## [1] 0.1200855
new_season_q2 = c(0,1,0,0)
new_season_q3 = c(0,0,1,0)
new_season_q4 = c(0,0,0,1)
data_forecast_add = data.frame(
time_train = 57:60,
season_q2 = new_season_q2,
season_q3 = new_season_q3,
season_q4 = new_season_q4
)
forecast_season_add = predict(model_season_add, newdata = data_forecast_add)
forecast_season_add
## 1 2 3 4
## 268.2266 270.3695 266.0124 270.6552
rmse(test_revenue, forecast_season_add)
## [1] 20.21466
mape(test_revenue, forecast_season_add)
## [1] 0.05712073
#Mua vu tuyen tinh dang nhan
interaction_q2 = time_train * season_q2
interaction_q3 = time_train * season_q3
interaction_q4 = time_train * season_q4
model_season_mul = lm(ts_train ~ time_train + interaction_q2 + interaction_q3 + interaction_q4)
summary(model_season_mul)
##
## Call:
## lm(formula = ts_train ~ time_train + interaction_q2 + interaction_q3 +
## interaction_q4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -103.664 -38.377 -1.459 39.962 108.973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 406.11284 13.36904 30.377 < 2e-16 ***
## time_train -2.38427 0.55580 -4.290 7.98e-05 ***
## interaction_q2 -0.02652 0.58496 -0.045 0.964
## interaction_q3 -0.09690 0.57766 -0.168 0.867
## interaction_q4 0.27076 0.57085 0.474 0.637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.32 on 51 degrees of freedom
## Multiple R-squared: 0.3954, Adjusted R-squared: 0.348
## F-statistic: 8.338 on 4 and 51 DF, p-value: 2.966e-05
rmse(ts_train, fitted(model_season_mul))
## [1] 47.06931
mape(ts_train, fitted(model_season_mul))
## [1] 0.1184726
data_forecast_mul = data.frame(
time_train = 57:60,
interaction_q2 = 57:60 * new_season_q2,
interaction_q3 = 57:60 * new_season_q3,
interaction_q4 = 57:60 * new_season_q4
)
forecast_season_mul = predict(model_season_mul, newdata = data_forecast_mul)
forecast_season_mul
## 1 2 3 4
## 270.2092 266.2865 259.7236 279.3023
rmse(test_revenue, forecast_season_mul)
## [1] 20.39319
mape(test_revenue, forecast_season_mul)
## [1] 0.05296969
#HW mua vu dang cong
model_hw_add = HoltWinters(ts_train, seasonal = "additive")
rmse(ts_train, fitted(model_hw_add)[,1])
## [1] 46.4913
mape(ts_train, fitted(model_hw_add)[,1])
## [1] 0.1161673
forecast_hw_add = forecast(model_hw_add, h = 4)
forecast_hw_add$mean
## Qtr1 Qtr2 Qtr3 Qtr4
## 2024 263.0070 268.0720 267.6760 276.5073
rmse(test_revenue, forecast_hw_add$mean)
## [1] 17.84044
mape(test_revenue, forecast_hw_add$mean)
## [1] 0.05297274
#HW mua vu dang nhan
model_hw_mul = HoltWinters(ts_train, seasonal = "multiplicative")
rmse(ts_train, fitted(model_hw_mul)[,1])
## [1] 46.38345
mape(ts_train, fitted(model_hw_mul)[,1])
## [1] 0.1151952
forecast_hw_mul = forecast(model_hw_mul, h = 4)
forecast_hw_mul$mean
## Qtr1 Qtr2 Qtr3 Qtr4
## 2024 269.2018 271.5326 269.6692 277.6537
rmse(test_revenue, forecast_hw_mul$mean)
## [1] 17.12226
mape(test_revenue, forecast_hw_mul$mean)
## [1] 0.05012387
#Chon HW dang nhan la mo hinh tot nhat du bao
model_hw_full = HoltWinters(ts_revenue, seasonal = "multiplicative")
forecast_hw_2025 = forecast(model_hw_full, h = 4)
forecast_hw_2025$mean
## Qtr1 Qtr2 Qtr3 Qtr4
## 2025 280.9046 283.1359 289.1929 295.3345
#Chuoi gia va loi suat
data_price = read_excel("D:/VTO_PriceHistory.xlsx")
price = ts(as.numeric(data_price$Price), start = 1, frequency = 1)
plot(price, type = "l", col = "blue")

log_return = diff(log(price)) * 100
log_return = ts(log_return, start = 1, frequency = 1)
plot(log_return, type = "l", col = "red")

#ADF
summary(ur.df(price, type = "trend", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.00409 -0.10273 -0.02009 0.08399 1.00524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2364029 0.0781692 3.024 0.00262 **
## z.lag.1 -0.0355467 0.0116694 -3.046 0.00244 **
## tt 0.0005649 0.0001922 2.939 0.00344 **
## z.diff.lag 0.0642412 0.0448251 1.433 0.15245
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2445 on 493 degrees of freedom
## Multiple R-squared: 0.02085, Adjusted R-squared: 0.01489
## F-statistic: 3.499 on 3 and 493 DF, p-value: 0.01547
##
##
## Value of test-statistic is: -3.0461 3.7485 4.7054
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
summary(ur.df(price, type = "drift", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.08029 -0.10421 -0.01303 0.08552 0.95832
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.056477 0.048989 1.153 0.250
## z.lag.1 -0.004076 0.004677 -0.872 0.384
## z.diff.lag 0.048779 0.044858 1.087 0.277
##
## Residual standard error: 0.2464 on 494 degrees of freedom
## Multiple R-squared: 0.003691, Adjusted R-squared: -0.0003427
## F-statistic: 0.915 on 2 and 494 DF, p-value: 0.4012
##
##
## Value of test-statistic is: -0.8715 1.2831
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
summary(ur.df(price, type = "none", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10291 -0.09768 -0.00872 0.09025 0.94131
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.001176 0.001058 1.112 0.267
## z.diff.lag 0.046391 0.044825 1.035 0.301
##
## Residual standard error: 0.2464 on 495 degrees of freedom
## Multiple R-squared: 0.005037, Adjusted R-squared: 0.001017
## F-statistic: 1.253 on 2 and 495 DF, p-value: 0.2866
##
##
## Value of test-statistic is: 1.1119
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
#ADF sai phan bac 1
summary(ur.df(diff(price), type = "trend", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.07582 -0.09841 -0.00927 0.08457 0.97624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.465e-03 2.227e-02 0.335 0.738
## z.lag.1 -9.975e-01 6.210e-02 -16.063 <2e-16 ***
## tt 3.159e-05 7.738e-05 0.408 0.683
## z.diff.lag 4.374e-02 4.491e-02 0.974 0.330
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2467 on 492 degrees of freedom
## Multiple R-squared: 0.4791, Adjusted R-squared: 0.4759
## F-statistic: 150.8 on 3 and 492 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -16.0628 86.0108 129.015
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
summary(ur.df(diff(price), type = "drift", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.07192 -0.10156 -0.01263 0.08476 0.97982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01534 0.01111 1.380 0.168
## z.lag.1 -0.99705 0.06204 -16.072 <2e-16 ***
## z.diff.lag 0.04361 0.04487 0.972 0.332
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2465 on 493 degrees of freedom
## Multiple R-squared: 0.4789, Adjusted R-squared: 0.4768
## F-statistic: 226.5 on 2 and 493 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -16.0717 129.1513
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
summary(ur.df(diff(price), type = "none", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06190 -0.08668 0.00307 0.10000 0.98850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.98943 0.06185 -15.998 <2e-16 ***
## z.diff.lag 0.03974 0.04482 0.887 0.376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2467 on 494 degrees of freedom
## Multiple R-squared: 0.4769, Adjusted R-squared: 0.4748
## F-statistic: 225.2 on 2 and 494 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -15.9978
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
#Kiem tra bac AR,MA
Acf(diff(price))

Pacf(diff(price))

model_arima_5_13 = arima(price, c(5, 1, 13))
summary(model_arima_5_13)
##
## Call:
## arima(x = price, order = c(5, 1, 13))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## 0.4001 -0.3227 -0.1816 0.2864 -0.8311 -0.3567 0.2578 0.1894
## s.e. 0.1341 0.1760 0.2064 0.1690 0.1206 0.1405 0.1742 0.1963
## ma4 ma5 ma6 ma7 ma8 ma9 ma10 ma11
## -0.2968 0.6297 0.1674 -0.1924 0.0026 -0.0083 -0.1977 0.0334
## s.e. 0.1603 0.1154 0.0613 0.0706 0.0798 0.0709 0.0699 0.0555
## ma12 ma13
## -0.0399 0.1090
## s.e. 0.0518 0.0508
##
## sigma^2 estimated as 0.05444: log likelihood = 13.62, aic = 10.77
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02131319 0.2330877 0.15424 0.1855524 1.439599 1.00028
## ACF1
## Training set -0.01195235
autoplot(model_arima_5_13)

checkresiduals(model_arima_5_13)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,1,13)
## Q* = 8.0318, df = 3, p-value = 0.04536
##
## Model df: 18. Total lags used: 21
model_arima_5_5 = arima(price, c(5, 1, 5))
summary(model_arima_5_5)
##
## Call:
## arima(x = price, order = c(5, 1, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -0.0932 0.2145 -0.1939 -0.0292 0.1493 0.1511 -0.2674 0.1483
## s.e. 0.2087 0.2406 0.1913 0.2298 0.1835 0.1981 0.2354 0.1870
## ma4 ma5
## 0.0305 -0.3767
## s.e. 0.2285 0.1755
##
## sigma^2 estimated as 0.05741: log likelihood = 4.7, aic = 12.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02311008 0.2393676 0.1551251 0.2014103 1.444021 1.00602
## ACF1
## Training set -0.007405784
autoplot(model_arima_5_5)

checkresiduals(model_arima_5_5)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,1,5)
## Q* = 4.0782, df = 3, p-value = 0.2531
##
## Model df: 10. Total lags used: 13
model_arima_13_5 = arima(price, c(13, 1, 5))
summary(model_arima_13_5)
##
## Call:
## arima(x = price, order = c(13, 1, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.1443 -0.2147 -0.1314 -0.0451 -0.7586 0.0782 -0.1469 0.0219
## s.e. 0.2214 0.1892 0.1623 0.2239 0.1471 0.0779 0.0690 0.0691
## ar9 ar10 ar11 ar12 ar13 ma1 ma2 ma3 ma4
## -0.0816 -0.1398 0.0177 -0.087 0.1525 -0.0988 0.1520 0.1265 0.0431
## s.e. 0.0695 0.0604 0.0510 0.050 0.0532 0.2231 0.1857 0.1610 0.2230
## ma5
## 0.5531
## s.e. 0.1395
##
## sigma^2 estimated as 0.0555: log likelihood = 12.86, aic = 12.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02076937 0.2353462 0.1559738 0.1801014 1.454261 1.011524
## ACF1
## Training set -0.004320014
autoplot(model_arima_13_5)

checkresiduals(model_arima_13_5)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(13,1,5)
## Q* = 6.3139, df = 3, p-value = 0.0973
##
## Model df: 18. Total lags used: 21
#So sanh voi thuc te
fc_price = forecast(model_arima_5_5, h = 10)$mean
price_real = c(14.8, 14.45, 13.9, 13.8, 14.15, 14.05, 13.8, 13.85, 13.8, 14.0)
rmse(price_real, fc_price)
## [1] 0.801461
mape(price_real, fc_price)
## [1] 0.05322225
df_plot_price = data.frame(tt = seq_along(price_real), price_real, fc_price)
ggplot(df_plot_price, aes(x = tt)) +
geom_line(aes(y = price_real, color = "Giá thực tế"), linewidth = 2) +
geom_line(aes(y = fc_price, color = "Giá dự báo"), linewidth = 2) +
labs(title = "So sánh giá cổ phiếu dự báo và thực tế", x = "Time", y = "Price") +
scale_color_manual(values = c("Giá thực tế" = "blue", "Giá dự báo" = "red")) +
theme_minimal()

#ADF chuoi loi suat
summary(ur.df(log_return, type = "trend", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5193 -1.0200 -0.1129 0.9192 6.7045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1260676 0.1917304 0.658 0.511
## z.lag.1 -1.0235062 0.0618330 -16.553 <2e-16 ***
## tt 0.0001022 0.0006656 0.154 0.878
## z.diff.lag 0.0655977 0.0445370 1.473 0.141
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.122 on 492 degrees of freedom
## Multiple R-squared: 0.4832, Adjusted R-squared: 0.4801
## F-statistic: 153.4 on 3 and 492 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -16.5527 91.3475 137.0164
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2 6.15 4.71 4.05
## phi3 8.34 6.30 5.36
summary(ur.df(log_return, type = "drift", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5049 -1.0182 -0.1156 0.9204 6.7175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.15158 0.09569 1.584 0.114
## z.lag.1 -1.02353 0.06177 -16.570 <2e-16 ***
## z.diff.lag 0.06566 0.04449 1.476 0.141
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.12 on 493 degrees of freedom
## Multiple R-squared: 0.4832, Adjusted R-squared: 0.4811
## F-statistic: 230.5 on 2 and 493 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -16.5696 137.2814
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
summary(ur.df(log_return, type = "none", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3253 -0.8714 0.0410 1.0683 6.8047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -1.01360 0.06155 -16.469 <2e-16 ***
## z.diff.lag 0.06055 0.04444 1.362 0.174
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.123 on 494 degrees of freedom
## Multiple R-squared: 0.4806, Adjusted R-squared: 0.4785
## F-statistic: 228.5 on 2 and 494 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -16.4689
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
#Kiem tra bac AR, MA
Acf(log_return)

Pacf(log_return)

model_ls_5_5 = arima(log_return, c(5, 0, 5))
## Warning in arima(log_return, c(5, 0, 5)): possible convergence problem: optim
## gave code = 1
summary(model_ls_5_5)
##
## Call:
## arima(x = log_return, order = c(5, 0, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3 ma4
## 0.0169 0.3053 -0.2824 0.0999 0.1873 0.0320 -0.3961 0.2187 -0.0664
## s.e. 0.4458 0.3576 0.6190 0.2565 0.3401 0.4376 0.3435 0.6454 0.2598
## ma5 intercept
## -0.3636 0.1628
## s.e. 0.3854 0.0595
##
## sigma^2 estimated as 4.366: log likelihood = -1073.74, aic = 2171.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.003570057 2.089456 1.433722 NaN Inf 0.6801143 0.0004672621
autoplot(model_ls_5_5)

checkresiduals(model_ls_5_5)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,5) with non-zero mean
## Q* = 2.8165, df = 3, p-value = 0.4208
##
## Model df: 10. Total lags used: 13
model_ls_18_5 = arima(log_return, c(18, 0, 5))
summary(model_ls_18_5)
##
## Call:
## arima(x = log_return, order = c(18, 0, 5))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.5553 -0.3288 0.1875 0.4344 -0.8178 0.1805 -0.1834 0.0672
## s.e. 0.1725 0.1881 0.1911 0.1947 0.1525 0.0752 0.0759 0.0840
## ar9 ar10 ar11 ar12 ar13 ar14 ar15 ar16
## -0.0473 -0.0635 -0.0019 -0.0865 0.1265 -0.1022 0.0270 -0.0747
## s.e. 0.0856 0.0834 0.0778 0.0735 0.0715 0.0679 0.0656 0.0629
## ar17 ar18 ma1 ma2 ma3 ma4 ma5 intercept
## -0.0483 -0.0481 -0.5128 0.2334 -0.1905 -0.4630 0.6710 0.1634
## s.e. 0.0608 0.0576 0.1681 0.1802 0.1717 0.1761 0.1385 0.0553
##
## sigma^2 estimated as 4.126: log likelihood = -1062.17, aic = 2174.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.003655175 2.031373 1.429586 NaN Inf 0.6781525 -0.0008185149
autoplot(model_ls_18_5)

checkresiduals(model_ls_18_5)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(18,0,5) with non-zero mean
## Q* = 5.0913, df = 3, p-value = 0.1652
##
## Model df: 23. Total lags used: 26
model_ls_5_18 = arima(log_return, c(5, 0, 18))
summary(model_ls_5_18)
##
## Call:
## arima(x = log_return, order = c(5, 0, 18))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## 0.4585 -0.1509 -0.0217 0.4310 -0.7173 -0.4125 0.0653 0.0077
## s.e. 0.1700 0.1478 0.1446 0.1343 0.1417 0.1760 0.1536 0.1414
## ma4 ma5 ma6 ma7 ma8 ma9 ma10 ma11
## -0.4360 0.5706 0.1536 -0.1336 0.0075 -0.0057 -0.0704 -0.0018
## s.e. 0.1326 0.1484 0.0634 0.0622 0.0646 0.0640 0.0638 0.0623
## ma12 ma13 ma14 ma15 ma16 ma17 ma18 intercept
## -0.0412 0.0992 -0.0508 -0.0172 -0.0319 -0.0253 -0.0641 0.1618
## s.e. 0.0624 0.0669 0.0684 0.0543 0.0525 0.0615 0.0715 0.0567
##
## sigma^2 estimated as 4.158: log likelihood = -1063.98, aic = 2177.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.00411781 2.039021 1.429853 NaN Inf 0.6782792 -0.002195243
autoplot(model_ls_5_18)

checkresiduals(model_ls_5_18)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,18) with non-zero mean
## Q* = 6.147, df = 3, p-value = 0.1047
##
## Model df: 23. Total lags used: 26
model_ls_auto = auto.arima(log_return)
summary(model_ls_auto)
## Series: log_return
## ARIMA(5,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 mean
## 0.0460 -0.0701 -0.0401 0.0075 -0.1583 0.1647
## s.e. 0.0446 0.0446 0.0451 0.0450 0.0450 0.0777
##
## sigma^2 = 4.481: log likelihood = -1077.16
## AIC=2168.33 AICc=2168.56 BIC=2197.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001733249 2.104128 1.449586 NaN Inf 0.6876398 0.002258355
autoplot(model_ls_auto)

checkresiduals(model_ls_auto)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,0) with non-zero mean
## Q* = 6.0777, df = 5, p-value = 0.2987
##
## Model df: 5. Total lags used: 10
#So sanh voi loi suat thuc te
log_return_real = c(0.677968699, -2.393276621, -3.880557442, -0.722024797, 2.504603193,
-0.709222831, -1.795380362, 0.361664047, -0.361664047, 1.438873745)
fc_log_return = forecast(model_ls_5_5, h = 10)$mean
rmse(log_return_real, fc_log_return)
## [1] 1.944191
smape(log_return_real, fc_log_return)
## [1] 1.44142
df_plot_return = data.frame(tl = seq_along(log_return_real), log_return_real, fc_log_return)
ggplot(df_plot_return, aes(x = tl)) +
geom_line(aes(y = log_return_real, color = "Lợi suất thực tế"), linewidth = 1) +
geom_line(aes(y = fc_log_return, color = "Lợi suất dự báo"), linewidth = 1) +
labs(title = "So sánh chuỗi log return dự báo và thực tế", x = "Time", y = "Log return") +
scale_color_manual(values = c("Lợi suất thực tế" = "blue", "Lợi suất dự báo" = "red")) +
theme_minimal()
